This notebook runs the bimodality test for organ-sex conditions with at least (10) replicates and summarizes the results.

Setup

Parameters

print(params)
## $input.samples.rds.path
## [1] "../../results/01_Filter_Samples"
## 
## $input.variability.rds.path
## [1] "../../results/03_Variability_Plots"
## 
## $output.rds.path
## [1] "../../results/05_Bimodality_Test"
## 
## $species
## [1] "DRE"
## 
## $species.name
## [1] "Danio rerio"
## 
## $min.replicates
## [1] 10

Libraries

# Install BimodalIndex
packages <- c("dplyr","tibble","matrixStats","BimodalIndex","ggplot2","ggpubr")

for (p in packages) {
  if (!require(p, character.only=TRUE)) {
    print(paste("Installing", p))
    install.packages(p, character.only=TRUE)
    renv::snapshot()
  }
  print(paste("Loading", p))
  library(p, character.only=TRUE)
}
## [1] "Loading dplyr"
## [1] "Loading tibble"
## [1] "Loading matrixStats"
## [1] "Loading BimodalIndex"
## [1] "Loading ggplot2"
## [1] "Loading ggpubr"

Functions

source("../functions/bimodality_test.R")
source("../functions/bimodality_test_plots.R")

Session Info

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 11.7.8
## 
## Matrix products: default
## LAPACK: /Users/cbucao/miniforge3/envs/fish-ev-gene-function/lib/libopenblasp-r0.3.25.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices datasets  utils     methods   base     
## 
## other attached packages:
##  [1] BimodalIndex_1.1.9   reshape2_1.4.4       tibble_3.1.7         tidyr_1.2.1          cowplot_1.1.1       
##  [6] ggpubr_0.4.0         slider_0.2.2         viridis_0.6.3        viridisLite_0.4.1    gplots_3.1.3        
## [11] ggplot2_3.3.6        matrixStats_0.62.0   edgeR_3.34.1         limma_3.48.3         colorBlindness_0.1.9
## [16] dplyr_1.0.9         
## 
## loaded via a namespace (and not attached):
##  [1] oompaBase_3.2.9     sass_0.4.6          splines_4.1.0       jsonlite_1.8.9      carData_3.0-5      
##  [6] warp_0.2.0          gtools_3.9.4        bslib_0.5.0         BiocManager_1.30.21 highr_0.10         
## [11] yulab.utils_0.1.8   renv_0.17.3         yaml_2.3.7          pillar_1.7.0        backports_1.4.1    
## [16] lattice_0.21-8      glue_1.8.0          digest_0.6.37       ggsignif_0.6.4      colorspace_2.1-0   
## [21] plyr_1.8.9          Matrix_1.5-4.1      htmltools_0.5.5     pkgconfig_2.0.3     broom_1.0.5        
## [26] purrr_0.3.5         tidytree_0.4.6      scales_1.2.1        mgcv_1.8-42         generics_0.1.3     
## [31] farver_2.1.1        car_3.1-2           ellipsis_0.3.2      cachem_1.0.8        withr_3.0.2        
## [36] lazyeval_0.2.2      cli_3.6.3           mclust_6.1.1        magrittr_2.0.3      crayon_1.5.3       
## [41] evaluate_1.0.1      fs_1.6.5            fansi_1.0.4         MASS_7.3-58.3       nlme_3.1-162       
## [46] rstatix_0.7.2       tools_4.1.0         lifecycle_1.0.4     stringr_1.5.0       munsell_0.5.0      
## [51] locfit_1.5-9.7      cluster_2.1.6       isoband_0.2.7       compiler_4.1.0      jquerylib_0.1.4    
## [56] caTools_1.18.2      gridGraphics_0.5-1  rlang_1.1.4         grid_4.1.0          bitops_1.0-7       
## [61] labeling_0.4.2      rmarkdown_2.22      gtable_0.3.3        abind_1.4-5         DBI_1.1.3          
## [66] R6_2.5.1            gridExtra_2.3       knitr_1.43          fastmap_1.1.1       utf8_1.2.3         
## [71] treeio_1.16.2       KernSmooth_2.23-21  ape_5.8             stringi_1.7.6       Rcpp_1.0.10        
## [76] vctrs_0.4.1         tidyselect_1.2.0    xfun_0.39

Data

Expression levels

# Load saved data from 01_Filter_Samples.Rmd
# Normalized within-condition
filtered.samples <- readRDS(
  file.path(params$input.samples.rds.path,
            paste(params$species, "4_reps_data.conditions.rds", sep="_")))

Expression variability

# Load processed expression variability estimates from 03_Variability_Plots.Rmd
jack.bind <- readRDS(
  file.path(params$input.variability.rds.path,
            paste(params$species, "jack.resid.cv.bind.rds", sep="_")))

Main

Data processing

Select conditions with minimum replicates

filtered.samples.retained <- list()

filtered.samples.retained$conditions <- filtered.samples$conditions %>%
  dplyr::filter(n>=params$min.replicates)

filtered.samples.retained$metadata <- filtered.samples$metadata %>%
  dplyr::filter(grepl(paste(filtered.samples.retained$conditions$regex,collapse="|"),.$SampleName))

if ("Sex" %in% colnames(filtered.samples.retained$conditions)) {
  conditions <- 
    paste(filtered.samples.retained$conditions$Tissue, 
          filtered.samples.retained$conditions$Sex, sep="_")
} else {
  conditions <- filtered.samples.retained$conditions$Tissue
}

filtered.samples.retained$log2.tmm.cpm.gf <- vector(mode="list", length=length(conditions))
names(filtered.samples.retained$log2.tmm.cpm.gf) <- conditions

jack.condition.retained <- vector(mode="list", length=length(conditions))
names(jack.condition.retained) <- conditions

for (c in 1:length(conditions)) {
  if ("Sex" %in% colnames(filtered.samples.retained$conditions)) {
    t <- filtered.samples.retained$conditions$Tissue[[c]]
    s <- filtered.samples.retained$conditions$Sex[[c]]
    
    ts <- paste(t,s,sep="_")
    
    filtered.samples.retained$log2.tmm.cpm.gf[[ts]] <- filtered.samples$log2.tmm.cpm.gf[[t]][[s]]
    
    jack.condition.retained[[ts]] <- jack.bind %>%
      dplyr::filter(Biotype=="protein_coding") %>%
      dplyr::filter(paste(.$Tissue, .$Sex, sep="_")==ts)
    
  } else {
    ts <- filtered.samples.retained$conditions$Tissue[[c]]
    
    filtered.samples.retained$log2.tmm.cpm.gf[[ts]] <- filtered.samples$log2.tmm.cpm.gf[[ts]]
    
    jack.condition.retained[[ts]] <- jack.bind %>%
      dplyr::filter(Biotype=="protein_coding") %>%
      dplyr::filter(Tissue==ts)
  }
  
  # Filter gene list
  filtered.samples.retained$log2.tmm.cpm.gf[[ts]] <- filtered.samples.retained$log2.tmm.cpm.gf[[ts]] %>%
    .[rownames(.) %in% jack.condition.retained[[ts]]$GeneID,]
}

Compute z-score per condition

zscore.matrix <- lapply(setNames(conditions, conditions), function(ts) {
  zscore(filtered.samples.retained$log2.tmm.cpm.gf[[ts]])
})

Compute bimodality test per condition

bimodality.test <- lapply(setNames(conditions, conditions), function(ts) {
  print(ts)
  bimodalIndex(zscore.matrix[[ts]]) %>%
    tibble::rownames_to_column(var="GeneID")
})
## [1] "brain_F"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 ..........
## 14 .........
## [1] "brain_M"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 ..........
## 14 ......
## [1] "eye_F"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 ..........
## 14 .....
## [1] "eye_M"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 ..........
## 14 .......
## [1] "gonads_F"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 ....
## [1] "intestine_F"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 ....
## [1] "intestine_M"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 
## [1] "liver_F"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ....
## [1] "pectoral_fin_F"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 .......
## [1] "pectoral_fin_M"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 ........
## [1] "skin_F"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 ..........
## 14 
## [1] "skin_M"
## 1 ..........
## 2 ..........
## 3 ..........
## 4 ..........
## 5 ..........
## 6 ..........
## 7 ..........
## 8 ..........
## 9 ..........
## 10 ..........
## 11 ..........
## 12 ..........
## 13 .......

Bin variability ranks

for (ts in conditions) {
  bimodality.test[[ts]]$VarRank <-
    vapply(bimodality.test[[ts]]$GeneID, FUN.VALUE=numeric(1), FUN=function(g) {
      jack.condition.retained[[ts]]$Mean_Local_Rank_Log2CV[jack.condition.retained[[ts]]$GeneID==g]
    })
  
  # Group variability ranks into bins of size 0.1
  bimodality.test[[ts]]$VarRankBin <- round(bimodality.test[[ts]]$VarRank, 1)
}

Plots

Boxplots - Bimodality index (BI) by variability rank

# Bind data frames for plotting
bimodality.test.bind <- dplyr::bind_rows(bimodality.test, .id="Condition")

Landscape

boxplot_bimodality_by_variability(bimodality.test.bind, title=params$species.name, facet=TRUE)

Portrait

boxplot_bimodality_by_variability(bimodality.test.bind, title=params$species.name, facet=TRUE)

Expression level z-scores by variability rank bin

Question: At which variability rank does the z-score distribution visually appear bimodal?

All

# Will only print last plot to HTML, but works on RStudio
for (ts in conditions) {
  plot_density_zscore(zscore.matrix[[ts]], bimodality.test[[ts]], title=ts)
}

Example: Brain

if ("brain" %in% filtered.samples.retained$conditions$Tissue) {
  if ("Sex" %in% colnames(filtered.samples.retained$conditions)) {
    ts <- "brain_F"
  } else { ts <- "brain" }

  plot_density_zscore(zscore.matrix[[ts]], bimodality.test[[ts]], title=ts)  
}

Example: Gonads

if ("gonads" %in% filtered.samples.retained$condtions$Tissue) {
  if ("Sex" %in% colnames(filtered.samples.retained$conditions)) {
    ts <- "gonads_F"
  } else { ts <- "gonads" }

  plot_density_zscore(zscore.matrix[[ts]], bimodality.test[[ts]], title=ts)  
}

Save

if (!dir.exists(params$output.rds.path)) { dir.create(params$output.rds.path, showWarnings=FALSE) }

saveRDS(filtered.samples.retained,
        file.path(params$output.rds.path, 
                  paste(params$species, "filtered.samples.retained.rds", sep="_")))

saveRDS(jack.condition.retained,
        file.path(params$output.rds.path, 
                  paste(params$species, "jack.condition.retained.rds", sep="_")))

saveRDS(zscore.matrix,
        file.path(params$output.rds.path, 
                  paste(params$species, "zscore.matrix.rds", sep="_")))

saveRDS(bimodality.test,
        file.path(params$output.rds.path, 
                  paste(params$species, "bimodality.test.rds", sep="_")))

saveRDS(bimodality.test.bind,
        file.path(params$output.rds.path, 
                  paste(params$species, "bimodality.test.bind.rds", sep="_")))